Taxa bar plot - core taxa only

Packages

pacman::p_load(
  dplyr,
  tidyverse,
  readxl,
  vegan,
  reshape2,
  RColorBrewer,
  data.table,
  grid,
  ggh4x,
  ggsci)
# setwd("D:PhD/01_Data/03_Vitro/02_Identification of glycan utilizing microbe") # Windows
setwd("/Volumes/Yiming_Wang/PhD/01_Data/03_Vitro/02_Identification of glycan utilizing microbe") # Mac

df_all <- read_excel("Glycan utilization_L6_taxa formatted.xlsx") %>% 
  mutate(Genotype=factor(Genotype,levels = c("WT","KO"))) %>% 
  mutate(Sex = factor(Sex, levels = c("Male", "Female"))) %>% 
  mutate(Genotype_Sex = factor(Genotype_Sex,levels = c("WT Male","WT Female","KO Male","KO Female"))) %>% 
  filter(Genotype == "WT") %>% 
  mutate(Sample = row_number()) %>% 
  mutate(Treatment = ifelse(Treatment == "No glycan", "mBasal", "mBasal + 2'-FL")) %>% 
  mutate(Treatment_Sex= paste0(Treatment,"_", Sex)) %>% 
  relocate(Sample, Treatment_Sex)

# Change 1 to 01
df_all$Sample <- sprintf("%02d", as.numeric(df_all$Sample)) 
df_all<- df_all %>% 
  mutate(Sample = factor(paste0("Sample", " ",df_all$Sample)))

Shape data

core taxa

df_presence <- df_all %>%  
  gather(Taxa, Abundance, -c(1:15)) %>% 
  select(1,2,3,4,5,6,7,8,16,17) %>% 
  filter(Abundance != 0) %>% # save the taxa of which abundance is not equal to 0
  group_by(Taxa) %>% 
  summarise(n=n_distinct(OTU_ID)) %>% #get number of samples that have non-zero abundance bacteria as grouped by bacteria# 
  mutate(n.sample=nrow(df_all)) %>% 
  mutate(p=n/n.sample)

df_presence_core <- df_presence %>% 
  filter (p > 0.95)

df_abundance <- df_all %>%
  gather(Taxa, Abundance, -c(1:15)) %>% # transform to long table
  select(1,3,4,6,9,10,16,17) %>%
  group_by(Taxa) %>% 
  summarise(mean_abundance=mean(Abundance,na.rm=T),
            median_abundance = median(Abundance,na.rm=T))

df_abundance_core <- df_abundance %>%
  filter(mean_abundance>0.0001) 

df_all_core <- left_join(df_presence_core, df_abundance_core, by="Taxa")

df_for_figure <- df_all %>% 
  gather(Taxa, Abundance, -c(1:15)) %>% 
  mutate(Taxa = ifelse(Taxa %in% df_all_core$Taxa, Taxa, "Non-core taxa")) %>% 
  group_by(Sample, OTU_ID, Genotype, Sex, Genotype_Sex, Treatment, Taxa) %>%
  summarise(Abundance = sum(Abundance)) %>%
  spread(Taxa, Abundance)

shape data frame for core taxa

#Create dataset with just headings and all taxa data
all_taxa1 <- df_for_figure %>% subset(select = c(1,7:ncol(df_for_figure))) 


#Create dataset with just headings and all metadata
all_taxa1_names <- df_for_figure[,1:6]


#"Melt" data so it becomes a long list of each cell
all_taxa2<- melt(all_taxa1, id = c("Sample"))
all_taxa2$Sample <- factor(all_taxa2$Sample, levels=unique(all_taxa2$Sample)) #factor the ID column

#Add metadata to melt 
all_taxa2_names <- left_join(all_taxa2, all_taxa1_names, by="Sample")

Figure

#Generate bar plot with separate lengend
# Choose the most abundant taxa as the first bar and decedent the samples
order<-all_taxa2_names%>%
  filter(variable == "Escherichia Shigella")%>%
  arrange(desc(value))

# Factor your oder
order$Sample<-factor(order$Sample,levels=unique(order$Sample))

# Apply the factor level to your original data
all_taxa2_names$Sample<-factor(all_taxa2_names$Sample,levels = order$Sample)

# Define color
mypal = pal_simpsons("springfield", alpha = 0.7)(7)
mypal
## [1] "#FED439B2" "#709AE1B2" "#8A9197B2" "#D2AF81B2" "#FD7446B2" "#D5E4A2B2"
## [7] "#197EC0B2"
library("scales")
show_col(mypal)

set.seed(860) #860
mypal<-sample(mypal)
show_col(mypal)

# Draw bar plot
all_bar <- ggplot(all_taxa2_names, aes(x = Sample, fill = reorder(variable,-value), y = value))+ #reorder taxa based on their relative abundance
  geom_bar(position="fill", stat="identity", linetype = 1, width = 0.7, colour = 'black',size = 0.3)+ #can replace colour = "black" with linetype = 0 for no outline
  theme(axis.title = element_text(face="plain", size = 10),
        axis.title.x=element_text(size =10),
        axis.title.y=element_text(size =20, margin = margin(r = 5)),
        axis.text.x=element_blank(), 
        axis.text.y=element_text(colour="black", face="plain", size=15),
        axis.ticks.x=element_blank(),
        plot.title = element_text(size=10, hjust=0.5, face="plain"),
        legend.position = "none",
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
  guides(fill=guide_legend(ncol=1))+
  scale_y_continuous(expand = c(0,0),labels=scales::percent) + 
  labs(x = "", y = "Relative Abundance (%)", fill = "OTU")+
  scale_fill_manual(values = c(mypal)) + 
  facet_grid(cols = vars(Treatment),scales = "free") + theme(strip.text = element_text(colour = 'white', size = rel(2)))
  
 all_bar  

#Just plot the legend
## Three columns
all_bar_legend_set <- ggplot(all_taxa2_names, aes(x = Sample, fill = reorder(variable,-value), y = value))+
  geom_bar(position="fill", stat="identity", linetype = 1, width = 0.8, color = "black", size = 0.3)+
  theme(legend.title = element_text(size = 8, face = "bold"), legend.text = element_text(size = 6, face = "bold", colour = "black"), legend.key.size = unit(0.2, "cm"))+
  scale_fill_manual(values = c(mypal))+
  guides(fill=guide_legend(ncol=3))


all_bar_legend <- cowplot::get_legend(all_bar_legend_set)
grid.newpage()
grid.draw(all_bar_legend)